#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <cuda.h>

__global__ void addKernel(int *c, const int *a, const int *b) {
    int i = threadIdx.x;
    c[i] = a[i] + b[i];
}

int main() {
    const int arraySize = 128;
    const int a[arraySize] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,9,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,5,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,1,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128};
    const int b[arraySize] = {10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200,210,220,230,240,250,260,270,280,290,300,310,320,330,340,350,360,370,380,390,400,410,420,430,440,450,460 ,470,480 ,490,500,510,520,530,540,550,560,570,580,590,600,610,620,630,640,650,660,670,680,690,700,710,720,730,740,750,760,770,780,790,800,810,820,830,840,850,860,870,880,890,900,910,920,930,940,950,960,970,980,990,1000,1010,1020,1030,1040,1050,1060,1070,1080,1090,1100,1110,1120,1130,140,1150,1160,1170,1180,190,1200,1210,1220,1230,1240,1250,1260,1270,1280,};
    int c[arraySize] = {0};
    CUresult cudaStatus;
    int *dev_a, *dev_b, *dev_c;
    //Allocate GPU buffere for three vectors (two inputs, one outputs)
    cudaMalloc((void **) &dev_c, arraySize * sizeof(int));
    cudaMalloc((void **) &dev_a, arraySize * sizeof(int));
    cudaMalloc((void **) &dev_b, arraySize * sizeof(int));
    //Copy input vectors from host memory to GPU buffers
    cudaMemcpy(dev_a, a, arraySize * sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(dev_b, b, arraySize * sizeof(int), cudaMemcpyHostToDevice);
    //Launch a kernel on the GPU with one thread for each element
    addKernel<<<1, arraySize>>>(dev_c, dev_a, dev_b);
    //Check errors
    cudaStatus = cudaGetLastError();
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "addvector failed: %s\n", cudaGetErrorString(cudaStatus));
        fprintf(stderr, "%s\n", cudaGetErrorString(cudaStatus));
        goto Error;
    }
    //any errors return
    cudaStatus = cudaDeviceSynchronize();
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaDeviceSynchronize returned %d\n", cudaStatus);
        fprintf(stderr, "%s\n", cudaGetErrorString(cudaStatus));
        goto Error;
    }
    //Copy output vector from GPU buffer to host memory
    cudaMemcpy(c, dev_c, arraySize * sizeof(int), cudaMemcpyDeviceToHost);
    cudaFree(dev_c);
    cudaFree(dev_a);
    cudaFree(dev_b);
    printf("{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,9,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,5,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,1,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,}\n + \n{10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200,210,220,230,240,250,260,270,280,290,300,310,320,330,340,350,360,370,380,390,400,410,420,430,440,450,460 ,470,480 ,490,500,510,520,530,540,550,560,570,580,590,600,610,620,630,640,650,660,670,680,690,700,710,720,730,740,750,760,770,780,790,800,810,820,830,840,850,860,870,880,890,900,910,920,930,940,950,960,970,980,990,1000,1010,1020,1030,1040,1050,1060,1070,1080,1090,1100,1110,1120,1130,140,1150,1160,1170,1180,190,1200,1210,1220,1230,1240,1250,1260,1270,1280,} \n = \n{%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,}\n", c[0],c[1],c[2],c[3],c[4],c[5],c[6],c[7],c[8],c[9],c[10],c[11],c[12],c[13],c[14],c[15],c[16],c[17],c[18],c[19],c[20],c[21],c[22],c[23],c[24],c[25],c[26],c[27],c[28],c[29],c[30],c[31],c[32],c[33],c[34],c[35],c[36],c[37],c[38],c[39],c[40],c[41],c[42],c[43],c[44],c[45],c[46],c[47],c[48],c[49],c[50],c[51],c[52],c[53],c[54],c[55],c[56],c[57],c[58],c[59],c[60],c[61],c[62],c[63],c[64],c[65],c[66],c[67],c[68],c[69],c[70],c[71],c[72],c[73],c[74],c[75],c[76],c[77],c[78],c[79],c[80],c[81],c[82],c[83],c[84],c[85],c[86],c[87],c[88],c[89],c[90],c[91],c[92],c[93],c[94],c[95],c[96],c[97],c[98],c[99],c[100],c[101],c[102],c[103],c[104],c[105],c[106],c[107],c[108],c[109],c[110],c[111],c[112],c[113],c[114],c[115],c[116],c[117],c[118],c[119],c[120],c[121],c[122],c[123],c[124],c[125],c[126],c[127]);
    getchar();
    return 0;
}